This document contains all of our analyses.

game_df = read_csv("./data/game_2.csv") %>% 
  separate(release_date, into = c("month_day","year"), sep = ",") %>% 
  separate(genre, into = "genre", sep = ",")

1. User score vs Metascore (vs Sales)?

game_df %>% 
  mutate(text_label = str_c("Title: ", title, "\nPublisher: ", publisher)) %>% 
  plot_ly(x = ~meta_score, y = ~user_score, type = "scatter", text = ~text_label)
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
game_df %>% 
  group_by(genre) %>% 
  mutate(count = n()) %>% 
  filter(count > 3) %>% 
  ggscatter(., x = "meta_score", y = "user_score", add = "reg.line") +
  facet_wrap(~genre) +
  stat_cor(label.x = 3, label.y = 11) +
  stat_regline_equation(label.x = 3, label.y = 10)
## `geom_smooth()` using formula 'y ~ x'

game_df %>% 
  group_by(platform) %>% 
  mutate(count = n()) %>% 
  filter(count > 3) %>% 
  ggscatter(., x = "meta_score", y = "user_score", add = "reg.line") +
  facet_wrap(~platform) +
  stat_cor(label.x = 3, label.y = 11) +
  stat_regline_equation(label.x = 3, label.y = 10)
## `geom_smooth()` using formula 'y ~ x'

game_df %>% 
  select(meta_score, user_score, total_sale) %>% 
  mutate(user_score = as.numeric(user_score)) %>% 
  rquery.cormat()

## $r
##            total_sale meta_score user_score
## total_sale          1                      
## meta_score        0.3          1           
## user_score      0.073       0.52          1
## 
## $p
##            total_sale meta_score user_score
## total_sale          0                      
## meta_score    1.8e-45          0           
## user_score    0.00089   4.1e-141          0
## 
## $sym
##            total_sale meta_score user_score
## total_sale 1                               
## meta_score            1                    
## user_score            .          1         
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

2. Total sales by genre / Distribution of sales by genre

game_df %>% 
  group_by(genre) %>% 
  mutate(count = n(),
         genre = str_c(genre, " (n=", count, ")")) %>%  
  ungroup() %>% 
  mutate(genre = fct_reorder(genre, total_sale),
         text_label = str_c("Title: ", title, "\nPublisher: ", publisher)) %>% 
  plot_ly(x = ~total_sale, color = ~genre, type = "box", colors = "viridis", text = ~text_label)

5. Preference of users of different platform

6. Preference of users from different regions

7. What factors influence sales? How do they influence it?